(*
  HDFS Digital Logic Hardware Design (HDFS.dll)
  Copyright (C) 2006 Andy Ray.

  This library is free software; you can redistribute it and/or
  modify it under the terms of the GNU Lesser General Public
  License as published by the Free Software Foundation; either
  version 2.1 of the License, or (at your option) any later version.

  This library is distributed in the hope that it will be useful,
  but WITHOUT ANY WARRANTY; without even the implied warranty of
  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
  Lesser General Public License for more details.

  You should have received a copy of the GNU Lesser General Public
  License along with this library; if not, write to the Free Software
  Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301  USA
*)

(** C, C++, C# and Simulink simulation model generator *)
(* Thanks to John White for providing the simulink model generator *)
module DigitalLogic.C

open DigitalLogic
open Numeric.Ops
open Numeric.Conversions
open Simulator
open Circuit
open Signal
open List
open System.Reflection

(** enumeration to select between types of C model generated *)
type c_gen_t = GenC | GenCsharp | GenCpp | GenCsimulink

(** Given an output channel, circuit name and circuit datatype writes a C, C++ or C# simulation model *)
let write mode f name (circuit : Circuit) = 
  let t0 = System.DateTime.Now in
  let timing s t0 t1 = System.Console.WriteLine("{0}: {1}", s, t1-t0) in
  let debug = true in

  let os = output_string f in
  let soi = string_of_int in

  let hex_of_int i = 
    match i with
    | 0  -> "0"  | 1  -> "1" | 2  -> "2" | 3  -> "3"
    | 4  -> "4"  | 5  -> "5" | 6  -> "6" | 7  -> "7"
    | 8  -> "8"  | 9  -> "9" | 10 -> "a" | 11 -> "b"
    | 12 -> "c"  | 13 -> "d" | 14 -> "e" | 15 -> "f"
    | _ -> failwith ("hex_of_int: Invalid hex char: " ^ (string_of_int i)) in
  let int_to_hex d = 
    let rec build v b =
      if 8 = b then ""
      else (build (v lsr 4) (b+1)) ^ (hex_of_int (v &&& 15)) in
    build d 0 in
  
  let inputs = circuit.Inputs in
  let outputs = circuit.Outputs in
  let wires = circuit.Wires in
  let regs = circuit.Regs in
  let mems = circuit.Memories in
  let logic = circuit.Logic in
  
  let named_wires = filter (fun x -> wire_name x <> "") wires in

  let dependants (signal : Signal) =
    match signal.signal with
    | Signal_reg _ -> []
    | Signal_mem(_,_,_,_,_,w,we,data,r) -> [w;we;data;r] (* XXX the read address cannot be dependant on the output.  The write port must surely also be scheduled??? 
                                                            I think what we need to do is schedule the read portion, then update the write sycnronously along with
                                                            registers at the end of the cycle XXXX  Should work out a testcase for this. 
                                                            Update: the bug turned up in some code.  Adding all signals to the schedule makes it work.  But, this
                                                            will fail if theres a path from q to any of w, we or data, which is incorrect.  What needs to happen
                                                            is we schedule the read and execute it as usual.  Then at the end (when everything else is updated
                                                            (except registers) do the write.  *)
    | Signal_wire(_,_,_,d) -> if !d = empty then [] else [!d] 
    | _ -> signal.dependants in

  let schedule = scheduler dependants (logic @ wires @ mems) (inputs @ regs) in

  (* Check that clocks and resets are simple wire (or chains thereof) or empty.  If not write warning that the circuit will not simulate as expected *)
  let rec check_clk_rst (s : Signal) = 
    if s.IsEmpty then true
    else if s.IsWire then
      check_clk_rst (wire_connection s)
    else false
  in
  let check_clk s = 
    if not (check_clk_rst s) then
      os ("*** WARNING: The clock signal " ^ s.name ^ " is driven by logic.\nThis is not supported by the simulator so simulation may not be correct.\n")
  in
  let check_rst s = 
    if not (check_clk_rst s) then
      os ("*** WARNING: The reset signal " ^ s.name ^ " is driven by logic.\nThis is not supported by the simulator so simulation may not be correct.\n")
  in

  (* create a map from uid, to size of each offset for each element *)
  let offset = ref 0 in
  let add_data uid words map = 
    let map = Map.add uid (!offset, words) map in
    offset := !offset + words;
    map 
  in

  let make_data_map map (signal : Signal) = 
    let words bits = (bits + 31) / 32 in
    match signal.signal with
    | Signal_empty    -> 
      map
    | Signal_const    (a,w,c) -> 
      add_data signal.uid (words w) map
    | Signal_binop    (a,w,op,s0,s1) -> 
      add_data signal.uid (words w) map
    | Signal_unop     (a,w,op,s) -> 
      add_data signal.uid (words w) map
    | Signal_wire     (a,w,n,data) -> 
      add_data signal.uid (words w) map
    | Signal_mux      (a,w,sel,data) -> 
      add_data signal.uid (words w) map
    | Signal_select   (a,hi,lo,s) -> 
      add_data signal.uid (words (hi-lo+1)) map
    | Signal_reg      (a,w,clk,rst,rstval,ena,data) -> 
      check_clk clk;
      check_rst rst;
      add_data signal.uid (words w) map
    | Signal_mem      (a,dw,aw,size,clk,w,we,data,r) -> 
      check_clk clk;
      add_data signal.uid (words dw) map
    | Signal_behave   (a,w,b,dl) -> 
      add_data signal.uid (words w) map
    | Signal_inst     _ ->
      failwith "Instantiation not supported in simulation"
    | Signal_tri     _ ->
      failwith "Tristates not supported in simulation"
  in
  
  let make_memories_map map (signal : Signal) = 
    let words bits = (bits + 31) / 32 in
    match signal.signal with
    | Signal_mem(a,dw,aw,size,clk,w,we,data,r) -> 
      add_data signal.uid ((words dw) * size) map
    | _ -> failwith "Expected a memory"
  in

  let data_map = fold_left make_data_map Map.empty (logic @ inputs @ outputs @ wires @ regs @ mems) in
  let seq_data_map = fold_left make_data_map Map.empty regs in
  let mem_data_map = fold_left make_memories_map Map.empty mems in

  let find (signal : Signal) = Map.find signal.uid data_map in
  let find_seq (signal : Signal) = Map.find signal.uid seq_data_map in
  let find_mem (signal : Signal) = Map.find signal.uid mem_data_map in

  let mask32 l =
    let md = l mod 32 in
    let md = if md = 0 then 32 else md in
    (-1) lsr (32-md) in
    
  let mem_at offset = ("mem[" ^ soi offset ^ "]") in
    
  let write_mask offset width bits =
    if bits % 32 <> 0 then
      os ("  mem[" ^ soi (offset + width - 1) ^ "] &= 0x" ^ int_to_hex (mask32 bits) ^ ";\n") in
    
  let write_copy offsetl offsetr width bits = 
    for i=0 to width-1 do
      os ("  mem[" ^ soi (offsetl + i) ^ "] = mem[" ^ soi (offsetr + i) ^ "];\n");
    done;
    write_mask offsetl width bits in

  let rec compile_seq_reset (signal : Signal) = 
    match signal.signal with
    | Signal_reg(a,w,clk,rst,rstval,ena,data) -> 
      if rst <> empty then
      begin
        let offset, width = find signal in
        let seq_offset, _ = find_seq signal in
        if rstval = empty then
          for i=0 to width-1 do
            os ("  " ^ mem_at (offset + i) ^ " = 0x00000000;\n");
            os ("  " ^ mem_at (seq_offset + i) ^ " = 0x00000000;\n")
          done
        else  
        begin
          let rst_offset, _ = find rstval in
          write_copy offset rst_offset width w;
          write_copy seq_offset rst_offset width w
        end
      end
    | _ -> failwith "Expecting a register"
  in

  (* generate the simulator *)
  if mode = GenCsharp then (
    os (
"using System;

using HDFS_Int = System.UInt32;
using HDFS_Int64 = System.UInt64;

namespace HDFS_CSim_" ^ name ^ " {

public enum HDFS_PortType { Input, Wire, Output }

public class HDFS_Port {

public string name;
public int bits;
public int data;
public HDFS_PortType type;

public HDFS_Port(string _name, int _data, int _bits, HDFS_PortType _type) {
  name = _name;
  bits = _bits;
  data = _data;
  type = _type;
}

}

public class HDFS_Simulator_" ^ name ^ " {

public HDFS_Int [] mem;
public HDFS_Int mem_size;
public HDFS_Port [] ports;
public HDFS_Int num_ports;

")
  ) else (
    if mode = GenCsimulink then (
      os (
"#define S_FUNCTION_NAME " ^ name ^ "
#define S_FUNCTION_LEVEL 2
#include \"simstruc.h\"
")
    );
    os ("#include <hdfs_c_sim.h>\n\n")
  );
  
  (* reset *)
  if mode = GenCsharp then (
    os (
"public void Reset() {
  HDFS_Int i;
");
    iter (fun x -> compile_seq_reset x) regs;
    os "}\n\n"
  ) else (
    os (
"void HDFS_Reset_" ^ name ^ "(HDFS_Simulator *sim) {
  HDFS_Int *mem = sim->mem;
  HDFS_Int i;
");
    iter (fun x -> compile_seq_reset x) regs;
    os "}\n\n"
  );
  
  (* cleanup *)
  if mode <> GenCsharp then (
    os (
"void HDFS_Destroy_" ^ name ^ "(HDFS_Simulator *sim) {
  free(sim->mem);
  free(sim->ports);
  free(sim);
}

")
  );


  (* Sim creation *)
  if mode = GenCsharp then (
    os (
"public HDFS_Simulator_" ^ name ^ "() {
  HDFS_Int i;
  mem_size = " ^ soi !offset ^ ";
  num_ports = " ^ soi (length inputs + length outputs + length named_wires) ^ ";
  mem = new HDFS_Int [mem_size];
  ports = new HDFS_Port [num_ports];
")
  ) else (
    os (
"HDFS_Simulator *HDFS_Create_" ^ name ^ "() {
  HDFS_Simulator *sim = (HDFS_Simulator *) malloc(sizeof(HDFS_Simulator));
  HDFS_Int i;
  sim->mem_size = " ^ soi !offset ^ ";
  sim->num_ports = " ^ soi (length inputs + length outputs + length named_wires) ^ ";
  sim->mem = (HDFS_Int *) malloc(sizeof(HDFS_Int) * sim->mem_size);
  sim->ports = (HDFS_Port *) malloc(sizeof(HDFS_Port) * sim->num_ports);
")
  );
  let port_no = ref 0 in
  let ss = if mode = GenCsharp then "" else "sim->" in
  let write_port_defn ptype s = 
    if mode = GenCsharp then (
      os ("  ports[" ^ soi !port_no ^ "] = new HDFS_Port(\"" ^ wire_name s ^ "\", " ^ soi (fst (find s)) ^ ", " ^ soi (width s) ^ ", HDFS_PortType." ^ ptype ^ ");\n");
      port_no := !port_no + 1
    ) else (
      os ("  sim->ports[" ^ soi !port_no ^ "].name = \"" ^ wire_name s ^ "\";\n");
      os ("  sim->ports[" ^ soi !port_no ^ "].data = " ^ soi (fst (find s)) ^ ";\n");
      os ("  sim->ports[" ^ soi !port_no ^ "].bits = " ^ soi (width s) ^ ";\n");
      os ("  sim->ports[" ^ soi !port_no ^ "].type = " ^ ptype ^ ";\n");
      port_no := !port_no + 1
    )
  in

  iter (write_port_defn "Input") inputs;
  iter (write_port_defn "Output") outputs;
  iter (write_port_defn "Wire") named_wires;
  
  os (
"  for (i=0; i<" ^ ss ^ "mem_size; i++) " ^ ss ^ "mem[i] = 0;
");  
  List.iter (fun (s : Signal) ->
    match s.signal with
    | Signal_const    (a,w,c) -> 
      let offset, words = find s in
      let a = Array.create words 0 in
      if debug then os ("  // const: " ^ c ^ "\n");
      int_array_of_bin_str a w c;
      for i=0 to (words-1) do
        os ("  " ^ ss ^ "mem[" ^ soi (offset + i) ^ "] = 0x" ^ int_to_hex a.(i) ^ ";\n");
      done
    | _ -> ()
  ) logic;
  if mode = GenCsharp then (
    os (
"  Reset();
}

");
  ) else (
    os (
"  HDFS_Reset_" ^ name ^ "(sim);
  return sim;
}

");
  );

  let hex_of_const32 (s : Signal) = 
    match s.signal with
    | Signal_const    (a,w,c) -> 
      let words = ((String.length c) + 31) / 32 in
      if words <> 1 then failwith "Behave error: const can only be 32 bits";
      let a = Array.create words 0 in
      int_array_of_bin_str a w c;
      int_to_hex a.(0)
    | _ -> failwith "Expecting constant\n"
  in

  (* port finding *)
  if mode = GenCsharp then (
    os (
"public HDFS_Port Port(string name) {
  HDFS_Int i;
  for (i=0; i<num_ports; i++) 
    if (name == ports[i].name) return ports[i];
  return null;
}

");
  ) else (
    os (
"HDFS_Port *HDFS_Port_" ^ name ^ "(HDFS_Simulator *sim, char *name) {
  HDFS_Int i;
  for (i=0; i<sim->num_ports; i++) 
    if (0 == strcmp(name, sim->ports[i].name)) return sim->ports + i;
  return 0;
}

");
  );

  (* simulation cycle *)

  let rec behave_tasks ind tgt_sig code = iter (behave_task ("  " ^ ind) tgt_sig) code
  and behave_task ind tgt_sig code = 
    match code with
    | B_if(cond, on_true, on_false) -> 
      let cond,_ = find cond in
      os (ind ^ "if (" ^ mem_at cond ^ " != 0) {\n");
      behave_tasks ind tgt_sig on_true;
      os (ind ^ "} else {\n");
      behave_tasks ind tgt_sig on_false;
      os (ind ^ "}\n")
      
    | B_switch(cond, cases) -> 
      (* XXX NO MORE THAN 32 bits XXX *)
      let cond,c_width = find cond in
      if c_width > 1 then failwith "Behave case: More than 32 bits...to be fixed";
      os (ind ^ "switch (" ^ mem_at cond ^ ") {\n");
      iter (fun (c_match, c_code) -> 
        os (ind ^ "case 0x" ^ hex_of_const32 c_match ^ ":\n");
        behave_tasks ind tgt_sig c_code;
        os ("  " ^ ind ^ "break;\n")
      ) cases;
      os (ind ^ "}\n")
      
    | B_assign(B_assign_tgt(_,_,_,_,_), expr) -> 
      let wid = width expr in
      let tgt,dwid = find tgt_sig in
      let expr,_ = find expr in
      for i=0 to dwid-1 do
        os (ind ^ "mem[" ^ soi (tgt + i) ^ "] = mem[" ^ soi (expr + i) ^ "];\n");
      done;
      if wid % 32 <> 0 then (* mask *)
        os (ind ^ "mem[" ^ soi (tgt + dwid - 1) ^ "] &= 0x" ^ int_to_hex (mask32 wid) ^ ";\n") 
  in

  let compile (signal : Signal) = 
      
    let write_bop_simple op offset offset0 offset1 width bits = 
      for i=0 to width-1 do
        os ("  mem[" ^ soi (offset + i) ^ "] = mem[" ^ soi (offset0 + i) ^ "] " ^ op ^ " mem[" ^ soi (offset1 + i) ^ "];\n");
      done;
      write_mask offset width bits in

    (* r = a * b *)
    let array_mul
      signed                  (* signed/unsigned multiplication *)
      words_r words_a words_b (* words in args *)
      bits_r bits_a bits_b    (* bits in args *)
      r a b                   (* offset of result and args in memory array *)
      = 
      (* return a string giving the value of the look up on the arg *)
      let access a words bits i =
        if i = words-1 then
          (* last word, sign extend if necessary *)
          if (signed = Signed) && (bits % 32 <> 0) then (
            ("((" ^ mem_at (a + i) ^ " & 0x" ^ int_to_hex (1 <<< ((bits-1)%32)) ^ ") != 0 ? " ^ 
              mem_at (a + i) ^ " | 0x" ^ int_to_hex (0xffffffff <<< (bits%32)) ^ " : " ^ 
              mem_at (a + i) ^ ")")
          ) else
            (mem_at (a + i))
        else if i >= words then
          if (signed = Signed) then 
            ("((" ^ mem_at (a + words - 1) ^ " & 0x" ^ int_to_hex (1 <<< ((bits-1)%32)) ^ ") != 0 ? 0xffffffff : 0x00000000)") 
          else "0x00000000"
        else
          (mem_at (a + i))
      in
      let access_a = access a words_a bits_a in
      let access_b = access b words_b bits_b in
      
      if words_r = 1 then (
        os ("  " ^ mem_at r ^ " = " ^ access_a 0 ^ " * " ^ access_b 0 ^ ";")  // ...SIGN EXTEND...
      ) else (

        for i = 0 to words_r - 1 do 
          os ("  " ^ mem_at (r + i) ^ " = 0;\n")
        done;
        os ("  temp64 = 0;\n");
        for i = 0 to words_r - 1 do 
          let ib = ref 0 in
          for ia = i downto 0 do 
            os ("  temp64 = ((HDFS_Int64) " ^ access_a ia ^ ") * ((HDFS_Int64) " ^ access_b !ib ^ ");\n");
            for ic = i to words_r - 1 do
              os ("  temp64 += (HDFS_Int64) " ^ mem_at (r+ic) ^ ";\n");
              os ("  " ^ mem_at (r + ic) ^ " = (HDFS_Int) (temp64 & 0xffffffff);\n");
              os ("  temp64 = temp64 >> 32;\n");
            done;
            ib := !ib + 1;
          done;
        done
      )
    in   

    match signal.signal with
    
    | Signal_empty 
    | Signal_const    (_) -> ()
    
    | Signal_binop    (a,w,op,s0,s1) -> 
    begin
      let offset0,_ = find s0 in
      let offset1,_ = find s1 in
      let w0 = width s0 in
      let w1 = width s1 in
      let offset,width = find signal in
      match op with
      | B_add ->
        if debug then os ("  // addition (" ^ soi w  ^ ")\n");
        if w <= 32 then
          write_bop_simple "+" offset offset0 offset1 width w
        else
        begin
          os ("  temp = 0;\n");
          for i=0 to width - 1 do
            // Avoid 64 bit arithmetic for now.
            os ("  temp = (" ^ mem_at (offset0 + i) ^ " & 0xFFFF) + (" ^ mem_at (offset1 + i) ^ " & 0xFFFF) + temp;\n");
            os ("  " ^ mem_at (offset + i) ^ " = temp & 0xFFFF;\n");
            os ("  temp = temp >> 16;\n");
            os ("  temp = ((" ^ mem_at (offset0 + i) ^ " >> 16) & 0xFFFF) + ((" ^ mem_at (offset1 + i) ^ " >> 16) & 0xFFFF) + temp;\n");
            os ("  " ^ mem_at (offset + i) ^ " |= (temp & 0xFFFF) << 16;\n");
            os ("  temp = temp >> 16;\n");
          done;
          write_mask offset width w
        end

      | B_sub ->
        if debug then os ("  // subtraction (" ^ soi w  ^ ")\n");
        if w <= 32 then
          write_bop_simple "-" offset offset0 offset1 width w
        else
        begin
          os ("  temp = 0;\n");
          for i=0 to width - 1 do
            // Avoid 64 bit arithmetic for now.
            os ("  temp = (" ^ mem_at (offset0 + i) ^ " & 0xFFFF) - (" ^ mem_at (offset1 + i) ^ " & 0xFFFF) - temp;\n");
            os ("  " ^ mem_at (offset + i) ^ " = temp & 0xFFFF;\n");
            os ("  temp = (temp >> 16) & 0x1;\n");
            os ("  temp = ((" ^ mem_at (offset0 + i) ^ " >> 16) & 0xFFFF) - ((" ^ mem_at (offset1 + i) ^ " >> 16) & 0xFFFF) - temp;\n");
            os ("  " ^ mem_at (offset + i) ^ " |= (temp & 0xFFFF) << 16;\n");
            os ("  temp = (temp >> 16) & 0x1;\n");
          done;
          write_mask offset width w
        end
        
      | B_mulu ->
        array_mul Unsigned ((w+31)/32) ((w0+31)/32) ((w1+31)/32) w w0 w1 offset offset0 offset1;
        write_mask offset width w
        
      | B_muls -> 
        array_mul Signed ((w+31)/32) ((w0+31)/32) ((w1+31)/32) w w0 w1 offset offset0 offset1;
        write_mask offset width w

      | B_eq ->
        if debug then os ("  // equality (" ^ soi w0 ^ ")\n");
        for i=0 to ((w0+31)/32) - 1 do
          if i=0 then
            os ("  " ^ mem_at offset ^ " = " ^ mem_at (offset0 + i) ^ " == " ^ mem_at (offset1 + i) ^ " ? (HDFS_Int) 1 : (HDFS_Int) 0;\n")
          else
            os ("  " ^ mem_at offset ^ " &= " ^ mem_at (offset0 + i) ^ " == " ^ mem_at (offset1 + i) ^ " ? (HDFS_Int) 1 : (HDFS_Int) 0;\n")
        done;
      
      | B_lt ->
        if debug then os ("  // comparison (" ^ soi w0 ^ ")\n");
        if w0 <= 32 then
          os ("  " ^ mem_at offset ^ " = " ^ mem_at offset0 ^ " < " ^ mem_at offset1 ^ " ? (HDFS_Int) 1 : (HDFS_Int) 0;\n")
        else
          let words = (w0+31)/32 in
          let range0 = rev [ offset0 .. (offset0 + words - 1) ] in
          let range1 = rev [ offset1 .. (offset1 + words - 1) ] in
          let test o0 o1 = "\n    " ^ mem_at o0 ^ " < " ^ mem_at o1 ^ " ? (HDFS_Int) 1 : " ^ mem_at o0 ^ " > " ^ mem_at o1 ^ " ? (HDFS_Int) 0 : " in
          os ("  " ^ mem_at offset ^ " = " ^ (fold_left (fun acc (o0,o1) -> acc ^ (test o0 o1)) "" (combine range0 range1)) ^ "(HDFS_Int) 0;\n")
      
      | B_cat -> 
        if debug then os ("  // concat (" ^ soi w0 ^ " & " ^ soi w1 ^ ")\n");
        let words_a = (w0 + 31) / 32 in
        let words_b = (w1 + 31) / 32 in
        if (w1 % 32) = 0 then 
        begin
          for i=0 to words_b - 1 do 
            os ("  " ^ mem_at (offset + i) ^ " = " ^ mem_at (offset1 + i) ^ ";\n")
          done;
          for i=0 to words_a - 1 do 
            os ("  " ^ mem_at (offset + words_b + i) ^ " = " ^ mem_at (offset0 + i) ^ ";\n")
          done
        end
        else 
        begin
          let shift_left = w1 % 32 in
          let shift_right = 32-shift_left in
          let xi = ref (words_b-1) in
          let ai = ref 0 in
          let bits_a = ref w0 in
          (* Copy b (at least 1 word will be copied) *)
          for bi=0 to words_b-1 do 
            os ("  " ^ mem_at (offset + bi) ^ " = " ^ mem_at (offset1 + bi) ^ ";\n")
          done;
          write_mask offset words_b w1;
          while (!bits_a > 0) do
            (* can fit the remaining bits into the last word *)
            if (!bits_a <= shift_right) then 
            begin
                os ("  " ^ mem_at (offset + !xi) ^ " |= " ^ mem_at (offset0 + !ai) ^ " << " ^ soi shift_left ^ ";\n");
                bits_a := 0
            end
            (* at least one complete word to copy, or remaining bits dont fit into just the current word *)
            else 
            begin
                os ("  " ^ mem_at (offset + !xi) ^ " |= " ^ mem_at (offset0 + !ai) ^ " << " ^ soi shift_left ^ ";\n");
                os ("  " ^ mem_at (offset + !xi + 1) ^ " = " ^ mem_at (offset0 + !ai) ^ " >> " ^ soi shift_right ^ ";\n");
                bits_a := !bits_a - 32;
                xi := !xi + 1;
                ai := !ai + 1
            end
          done
        end;
        write_mask offset width w

      | B_and -> 
        if debug then os ("  // logical and (" ^ soi w  ^ ")\n");
        write_bop_simple "&" offset offset0 offset1 width w
      | B_or -> 
        if debug then os ("  // logical or (" ^ soi w  ^ ")\n");
        write_bop_simple "|" offset offset0 offset1 width w
      | B_xor -> 
        if debug then os ("  // logical xor (" ^ soi w  ^ ")\n");
        write_bop_simple "^" offset offset0 offset1 width w
    end
    
    | Signal_unop     (a,w,op,s) -> 
      let offset,width = find signal in
      let doffset,_ = find s in
      if debug then os ("  // logical not (" ^ soi w  ^ ")\n");
      for i=0 to width-1 do
        os ("  mem[" ^ soi (offset + i) ^ "] = ~ mem[" ^ soi (doffset + i) ^ "];\n");
      done;
      write_mask offset width w

    | Signal_wire     (a,w,n,data) -> 
      let offset,width = find signal in
      if debug then os ("  // wire (" ^ soi w  ^ ")\n");
      if !data = empty then 
        write_mask offset width w
      else
        let doffset, _ = find !data in
        write_copy offset doffset width w

    | Signal_mux      (a,w,sel,dlist) -> 
      let len = length dlist in
      if debug then os ("  // mux (" ^ soi len  ^ ")\n");
      let offset, width = find signal in
      let sel_offset, sel_width = find sel in
      let opts = mapi (fun i x -> i, find x) dlist in
      os ("  switch (" ^ mem_at sel_offset ^ ") {\n");
      List.iter (fun (idx, (data_offset, data_width)) ->
        if idx = (len-1) then os ("  default:\n")
        else os ("  case " ^ soi idx ^ ":\n");
        write_copy offset data_offset width w;
        os ("  break;\n");
      ) opts;
      os "  }\n"

    | Signal_select   (a,msb,lsb,s) -> 
      let offset,width = find signal in
      let doffset,_ = find s in
      
      let lsw = lsb / 32 in
      let msw = (msb - lsb) / 32 in
      let shift_right = lsb % 32 in
      
      if debug then os ("  // select (" ^ soi msb ^ ":" ^ soi lsb ^ ")\n");      
      if (msb / 32) = (lsb / 32) then (* msb and lsb in same word *)
        os ("  " ^ mem_at offset ^ " = " ^ mem_at (doffset + lsw) ^ " >> " ^ soi shift_right ^ ";\n")
      else if shift_right = 0 then 
      begin
        for i = 0 to msw do
          os ("  " ^ mem_at (offset + i) ^ " = " ^ mem_at (doffset + lsw + i) ^ ";\n")
        done
      end
      else 
      begin
        let shift_left  = 32 - shift_right in
        let smsw = msw - 1 in
        for i = 0 to smsw do
          os ("  " ^ mem_at (offset + i) ^ " = (" ^ 
            mem_at (doffset + lsw + i) ^ " >> " ^ soi shift_right ^ ") | (" ^ 
            mem_at (doffset + lsw + i + 1) ^ " << " ^ soi shift_left ^ ");\n");
        done;
        (* NOTE: this can reach outside memory (reserved for this signal) but only when the data isn't needed
           The temp fix is to ensure that memory location is valid for some other signal but it aint the "right" way *)
        os ("  " ^ mem_at (offset + msw) ^ " = (" ^ 
          mem_at (doffset + lsw + msw) ^ " >> " ^ soi shift_right ^ ") | (" ^ 
          mem_at (doffset + lsw + msw + 1) ^ " << " ^ soi shift_left ^ ");\n")
      end;
      write_mask offset width (msb-lsb+1)

    (* Registers must update in two parts.  First we copy the data to the seq_data_map.  
       Later we'll copy to the actual value (which is read by other tasks).
       This is because registers which read other registers are not scheduled.  
       This isnt so with memories as they are read asycnhronously (and are therefore sheduled) *)
      
    | Signal_reg      (a,w,clk,rst,rstval,ena,data) -> 
      let offset, width = find_seq signal in
      let data_offset, _ = find data in
      if debug then os ("  // register (" ^ soi w ^ ")\n");      
      if ena = empty then 
        write_copy offset data_offset width w
      else
        let ena_offset, _ = find ena in 
        os ("  if (" ^ mem_at ena_offset ^ " != 0) {\n");
        write_copy offset data_offset width w;
        os ("  }\n")
      
    | Signal_mem      (a,dw,aw,size,clk,w,we,data,r) -> 
      if debug then os ("  // memory (" ^ soi dw ^ ", " ^ soi size ^ ")\n");      
      let dwords = (dw + 31) / 32 in
      let q,_ = find signal in
      let w,_ = find w in
      let we,_ = find we in
      let data,_ = find data in
      let r,_ = find r in
      let mem,_ = find_mem signal in
      (* read *)
      os ("  temp = " ^ mem_at r ^ ";\n");
      os ("  if (temp < " ^ soi size ^ ") {\n");
      if dwords <> 1 then os ("  temp *= " ^ soi dwords ^ ";\n");
      for i=0 to dwords-1 do
        os ("  mem[" ^ soi (q + i) ^ "] = mem[temp + " ^ soi (mem + i) ^ "];\n");
      done;
      write_mask q dwords dw;
      os ("  }\n");
      (* write *)
      os ("  if (" ^ mem_at we ^ " != 0) {\n");
      os ("  temp = " ^ mem_at w ^ ";\n");
      os ("  if (temp < " ^ soi size ^ ") {\n");
      if dwords <> 1 then os ("  temp *= " ^ soi dwords ^ ";\n");
      for i=0 to dwords-1 do
        os ("  mem[temp + " ^ soi (mem + i) ^ "] = mem[" ^ soi (data + i) ^ "];\n");
      done;
      if dw % 32 <> 0 then (* mask *)
        os ("  mem[temp + " ^ soi (mem + dwords - 1) ^ "] &= 0x" ^ int_to_hex (mask32 dw) ^ ";\n");
      os ("  }\n");
      os ("  }\n");

    | Signal_behave   (a,w,b,dl) -> 
      if debug then os ("  // Behave (" ^ soi w ^ ")\n");      
      behave_tasks "" signal b 
      
    | Signal_inst     (a,n,g,io,i,o) ->
      failwith "Instantiation nodes not supported in simulation"
      
    | Signal_tri     _ ->
      failwith "Tristate nodes not supported in simulation"
      
  in
  
  let compile_seq (signal : Signal) = 
    match signal.signal with
    | Signal_reg      (_) 
    | Signal_mem      (_) -> 
      let bits = width signal in
      if debug then os ("  // register (" ^ soi bits ^ ")\n");      
      let q0,width = find_seq signal in
      let q1,_ = find signal in
      write_copy q1 q0 width bits
    | _ -> failwith "Expecting register or memory"
  in
  
  if mode = GenCsharp then (
    os (
"public void Cycle() {\n
  HDFS_Int temp;
  HDFS_Int64 temp64;

");
  ) else (
    os (
"void HDFS_Cycle_" ^ name ^ "(HDFS_Simulator *sim) {\n
  HDFS_Int *mem = sim->mem;
  HDFS_Int temp;\n
  HDFS_Int64 temp64;\n

");
  );
  
  if debug then os ("  // Inputs\n\n");
  iter compile inputs;
  if debug then os ("\n  // Scheduled logic\n\n");
  iter compile schedule;
  if debug then os ("\n  // Outputs\n\n");
  iter compile outputs;
  if debug then os ("\n  // Registers (calculate next value)\n\n");
  iter compile regs;
  if debug then os ("\n  // Registers (set next value)\n\n");
  iter compile_seq regs;
  os ("}\n\n");
  
  (* write a cpp class wrapper *)
  if mode = GenCpp then
  begin
    let cname = "HDFS_Simulator_" ^ name in
    os (
"class " ^ cname ^ " {
public:
  HDFS_Simulator *sim;
  " ^ cname ^ "() { sim = HDFS_Create_" ^ name ^ "(); }
  ~" ^ cname ^ "() { HDFS_Destroy_" ^ name ^ "(sim); }
  void Reset() { HDFS_Reset_" ^ name ^ "(sim); }
  void Cycle() { HDFS_Cycle_" ^ name ^ "(sim); }
  HDFS_Port * Ports() { return sim->ports; }
  HDFS_Int NumPorts() { return sim->num_ports; }
  HDFS_Port * operator [] (char *name) { return HDFS_Port_" ^ name ^ "(sim, name); }
");
  os ("};\n\n");
  end;

  if mode = GenCsharp then
  begin
    os ("}\n");
    os ("}\n");
  end;

  if mode = GenCsimulink then
  begin
    os(
"
#define nin "^ soi (length inputs - 2) ^"
#define nout "^ soi (length outputs) ^"
#define ntot "^ soi (length inputs + length outputs) ^"

static void mdlInitializeSizes(SimStruct *S)
{
  int i;
  ssSetNumSFcnParams(S, 0);
  if (ssGetNumSFcnParams(S) != ssGetSFcnParamsCount(S)) return;
  ssSetNumContStates(S, 0);
  ssSetNumDiscStates(S, 1);
  if (!ssSetNumInputPorts(S, nin)) return;
  for (i=0; i<nin; i++) {
    ssSetInputPortWidth(S, i, 1);
    ssSetInputPortDirectFeedThrough(S, i, 0);
  }
  if (!ssSetNumOutputPorts(S, " ^ soi (length outputs) ^ ")) return;
  for (i=0; i<nout; i++) {
    ssSetOutputPortWidth(S, i, 1);
  }
  ssSetNumSampleTimes(S, 1);
  ssSetNumRWork(S, 0);
  ssSetNumIWork(S, 0);
  ssSetNumPWork(S, 0);
  ssSetNumModes(S, 0);
  ssSetNumNonsampledZCs(S, 0);
  ssSetOptions(S, SS_OPTION_EXCEPTION_FREE_CODE);
}

static void mdlInitializeSampleTimes(SimStruct *S)
{
  ssSetSampleTime(S, 0, INHERITED_SAMPLE_TIME);
  ssSetOffsetTime(S, 0, 0.0);
  ssSetModelReferenceSampleTimeDefaultInheritance(S);
}

#define MDL_INITIALIZE_CONDITIONS

static void mdlInitializeConditions(SimStruct *S)
{
  HDFS_Simulator *sim;
  sim = HDFS_Create_" ^ name ^ "();
  ssSetUserData(S, sim);
}

static void mdlOutputs(SimStruct *S, int_T tid)
{
  HDFS_Simulator *sim;
  int i;
  real_T *y;

  UNUSED_ARG(tid); /* not used in single tasking mode */

  sim = (HDFS_Simulator *)ssGetUserData(S);
  for (i=0;i<nout;i++) {
    y=ssGetOutputPortRealSignal(S,i);
    y[0] = sim->mem[sim->ports[nin+2+i].data];
  }
}

#define MDL_UPDATE
static void mdlUpdate(SimStruct *S, int_T tid)
{
  HDFS_Simulator *sim;
  int i;
  int memloc;
  InputRealPtrsType u;

  UNUSED_ARG(tid); /* not used in single tasking mode */

  sim = (HDFS_Simulator *)ssGetUserData(S);
  for (i=0;i<nin;i++) {
    u = ssGetInputPortRealSignalPtrs(S,i);
    memloc = sim->ports[i+2].data;
    (sim->mem)[memloc] = *u[0];
  }
  HDFS_Cycle_" ^ name ^ "(sim);
}

static void mdlTerminate(SimStruct *S)
{
  HDFS_Simulator *sim;
  sim = (HDFS_Simulator *)ssGetUserData(S);
  HDFS_Destroy_" ^ name ^ "(sim);
}

#ifdef  MATLAB_MEX_FILE    /* Is this file being compiled as a MEX-file? */
#include \"simulink.c\"      /* MEX-file interface mechanism */
#else
#include \"cg_sfun.h\"       /* Code generation registration function */
#endif

"
);
  end;

  timing "Wrote C simulation in" t0 System.DateTime.Now
  
(** Writes a C simulation model *)
let write_c = write GenC
(** Writes a C++ simulation model *)
let write_cpp = write GenCpp
(** Writes a C# simulation model *)
let write_cs = write GenCsharp
(** Writes a Simulink model in C *)
let write_csimulink = write GenCsimulink

(** Loads a C# simulation and constructs a simulator object *)
let create_csim path name = 
  try
    let assembly = Assembly.LoadFrom(path ^ name ^ ".dll") in
    let types = assembly.GetTypes() in                        
    let name_space = "HDFS_CSim_" ^ name ^ "." in
    let sim_type = assembly.GetType(name_space ^ "HDFS_Simulator_" ^ name) in
    let port_type = assembly.GetType(name_space ^ "HDFS_Port") in
    let porttype_type = assembly.GetType(name_space ^ "HDFS_PortType") in

    (* sim methods and fields *)
    let sim_methods = sim_type.GetMethods() in
    let cycle     = (Array.find (fun (m : MethodInfo) -> m.Name = "Cycle") sim_methods) in
    let reset     = (Array.find (fun (m : MethodInfo) -> m.Name = "Reset") sim_methods) in
    let port      = (Array.find (fun (m : MethodInfo) -> m.Name = "Port") sim_methods) in
    
    let mem       = sim_type.GetField("mem") in
    let mem_size  = sim_type.GetField("mem_size") in
    let ports     = sim_type.GetField("ports") in

    (* port data *)
    let port_name = port_type.GetField("name") in
    let port_data = port_type.GetField("data") in
    let port_bits = port_type.GetField("bits") in
    let port_type = port_type.GetField("type") in
    
    (* construct the object *)
    let sim = System.Activator.CreateInstance(sim_type) in

    (* get values from the simulator *)
    let mem       = mem.GetValue(sim) :?> uint32 array in
    let mem_size  = mem_size.GetValue(sim) :?> uint32 in
    let ports     = ports.GetValue(sim) :?> System.Object array in

    (* split ports into inputs,  outputs and wires *)
    let ports = List.of_array ports in
    let sel_port dir = List.filter (fun x -> (port_type.GetValue(x) :?> int) = dir) in
    let inputs = sel_port 0 ports in
    let wires = sel_port 1 ports in
    let outputs = sel_port 2 ports in
    
    let set_ports = 
      map (fun x ->
        let name = port_name.GetValue(x) :?> string in
        let bits = port_bits.GetValue(x) :?> int in
        let data = port_data.GetValue(x) :?> int in
        name, bits, data
      )
    in

    let inputs = set_ports inputs in
    let outputs = set_ports outputs in
    let wires = set_ports wires in

    (* copy inputs, outputs and wires *)
    
    let port_no = ref 100 in
    let new_port name width = 
      port_no := !port_no + 1;
      { port_uid = !port_no; port_name = name; port_data = Numeric.ArrayBits.make width } 
    in
    
    let write (name,bits,offset) = 
      let words = (bits+31)/32 in
      let p = new_port name bits in
      p, 
      (fun () -> 
        for i=0 to words-1 do
          mem.(i + offset) <- p.[i]
        done
      ) 
    in
    
    let read (name,bits,offset) = 
      let words = (bits+31)/32 in
      let p = new_port name bits in
      p,
      (fun () -> 
        for i=0 to words-1 do
          p.[i] <- mem.(i + offset)
        done
      ) 
    in
    
    let inputs, copy_inputs = split (map write inputs) in
    let outputs, copy_outputs = split (map read outputs) in
    let wires, copy_wires= split (map read wires) in

    let reset () = ignore (reset.Invoke(sim, null)) in
    let cycle () = 
        iter (fun x -> x()) copy_inputs;
        ignore (cycle.Invoke(sim, null));
        iter (fun x -> x()) copy_wires;
        iter (fun x -> x()) copy_outputs
    in
    {
      sim_circuit  = Circuit([],[],[],[],[],[],[],[],Set.empty,Set.empty,Set.empty,Map.empty,Map.empty,0);
      sim_reset    = reset;
      sim_cycle    = cycle;
      sim_inputs   = inputs;
      sim_wires    = wires;
      sim_outputs  = outputs;
      sim_port_map = fold_left (fun map (s:Port) -> Map.add s.uid s map) Map.empty (inputs @ wires @ outputs);
      sim_name_map = fold_left (fun map (s:Port) -> Map.add s.name s.uid map) Map.empty (inputs @ wires @ outputs);
      sim_data_map = Map.empty;
      sim_reg_map  = Map.empty;
      sim_mem_map  = Map.empty;
    }    
    
  with 
  | _ -> failwith "Cannot load simulation"
  
(** Calls the C# compiler *)
let csc args fnames = Simulator.execute_command ("csc") (args ^ " " ^ fold_strings " " fnames)

(** Writes a C# simulation, compiles it and builds a simulation object *)
let create_all_csim circuit path dut_name = 

  (* prepare the c# dll *)
  Circuit.write_file write_cs path dut_name ".cs" circuit;
  csc "/t:library" [ path ^ dut_name ^ ".cs" ];

  (* build the simulator *)
  create_csim path dut_name

